It is often a necessity to compare some sequences to find out how similar they are. There are many similarity measures that can be used, e.g., longest common subsequence, edit distance, sequence alignment. Recently a merged longest common subsequence (MergedLCS) problem was formulated with applications in bioinformatics. We propose the bit-parallel algorithms for the MergedLCS problem and evaluate them in practice showing that they are usually tens times faster than the already published methods.
int MLCS_Bitpar(vector< int> T, vector< int> A, vector< int> B, int Tlen, int Alen, int Blen, int size, int WordSize) { vector< vector< uint64_t> > W1(Blen + 1, vector< uint64_t>(ceil((Tlen + 1) / WordSize + 1))); vector< vector< uint64_t> > W2(Blen + 1, vector< uint64_t>(ceil((Tlen + 1) / WordSize + 1))); vector< vector< uint64_t> > Y(size, vector< uint64_t>(ceil((Tlen + 1) / WordSize + 1), 0)); uint64_t carrybitW = 0; uint64_t MaxNUM64 = 0XFFFFFFFFFFFFFFFF;//2^64 - 1 uint64_t modLastWord = 0X0000020000000000;//2^42 //uint64_t modLastWord = 0X0000000000000010; int WordNum = ceil((Tlen + 1) / WordSize + 1) - 1; //Constructing array of Y uint64_t shiftNum = 1; for (int i = 1; i <= min(Tlen, WordSize - 1); ++i) { shiftNum = shiftNum << 1; Y[T[i] - 1][0] |= shiftNum; } if (Tlen > 63) for (int i = WordSize; i <= Tlen; ++i) { shiftNum = shiftNum << 1; if (i % WordSize == 0) shiftNum = 1; Y[T[i] - 1][floor(i / WordSize)] |= shiftNum; } //Initialisation for (int i = 0; i <= WordNum; ++i) { W2[0][i] = MaxNUM64; } W2[0][WordNum] %= modLastWord; //Calculating boundaries uint64_t U = 0, tmpa = 0; for (int k = 1; k <= Blen; ++k) { carrybitW = 0; for (int i = 0; i <= WordNum - 1; ++i) { tmpa = W2[k - 1][i]; U = tmpa & Y[B[k] - 1][i]; W2[k][i] = (tmpa + U + carrybitW) | (W2[k - 1][i] - U); //detect overflow if (U > MaxNUM64 - tmpa) carrybitW = 1; else carrybitW = 0; } tmpa = W2[k - 1][WordNum]; U = tmpa& Y[B[k] - 1][WordNum]; W2[k][WordNum] = ((tmpa + U + carrybitW) % modLastWord) | (tmpa - U); //detect overflow if (U > MaxNUM64 - tmpa) carrybitW = 1; else carrybitW = 0; } int layer = 1; while (1) { if (layer % 2 == 1) { carrybitW = 0; for (int i = 0; i <= WordNum - 1; ++i) { tmpa = W2[0][i]; U = tmpa & Y[A[layer] - 1][i]; W1[0][i] = (tmpa + U + carrybitW) | (tmpa - U); //detect overflow if (U > MaxNUM64 - tmpa) carrybitW = 1; else carrybitW = 0; } tmpa = W2[0][WordNum]; U = tmpa & Y[A[layer] - 1][WordNum]; W1[0][WordNum] = ((tmpa + U + carrybitW) % modLastWord) | (tmpa - U); //detect overflow if (U > MaxNUM64 - tmpa) carrybitW = 1; else carrybitW = 0; //Main calculations uint64_t Up = 0, Upp = 0, Wp = 0, Wpp = 0, Ut = 0, Wt = 0, Vt = 0, carrybitWp = 0, carrybitWpp = 0; int tmpv = ceil(log2(WordSize)) - 1; for (int k = 1; k <= Blen; ++k) { carrybitWp = 0, carrybitWpp = 0; bool f = false; for (int i = 0; i <= WordNum - 1; ++i) { tmpa = W2[k][i]; Up = tmpa & Y[A[layer] - 1][i]; Wp = (tmpa + Up + carrybitWp) | (tmpa - Up); if (Up > MaxNUM64 - tmpa) carrybitWp = 1; else carrybitWp = 0; tmpa = W1[k - 1][i]; Upp = tmpa & Y[B[k] - 1][i]; Wpp = (tmpa + Upp + carrybitWpp) | (tmpa - Upp); if (Upp > MaxNUM64 - tmpa) carrybitWpp = 1; else carrybitWpp = 0; Ut = Wp | Wpp; Wt = Wp ^ Wpp; Vt = Wt; for (int ipp = 0; ipp <= tmpv; ++ipp) { Vt = Vt ^ (Vt << (1 << ipp)); } if (f == true) Vt = ~Vt; if ((Vt & 0X8000000000000000) > 0) f = true; else f = false; W1[k][i] = ~(Wt & Vt) & Ut; } tmpa = W2[k][WordNum]; Up = tmpa & Y[A[layer] - 1][WordNum]; Wp = ((tmpa + Up + carrybitWp) % modLastWord) | (tmpa - Up); if (Up > MaxNUM64 - tmpa) carrybitWp = 1; else carrybitWp = 0; tmpa = W1[k - 1][WordNum]; Upp = tmpa & Y[B[k] - 1][WordNum]; Wpp = ((tmpa + Upp + carrybitWpp) % modLastWord) | (tmpa - Upp); if (Upp > MaxNUM64 - tmpa) carrybitWpp = 1; else carrybitWpp = 0; Ut = Wp | Wpp; Wt = Wp ^ Wpp; Vt = Wt; for (int ipp = 0; ipp <= tmpv; ++ipp) { Vt = Vt ^ (Vt << (1 << ipp)); } if (f == true) Vt = ~Vt; if ((Vt & 0X8000000000000000) > 0) f = true; else f = false; W1[k][WordNum] = ~(Wt & Vt) & Ut; } if (layer == Alen) { int z = 0; uint64_t Vz = 0; for (int i = 0; i <= WordNum - 1; ++i) { Vz = ~W1[Blen][i]; while (Vz != 0) { Vz = Vz & (Vz - 1); ++z; } } Vz = (~W1[Blen][WordNum]) % modLastWord; while (Vz != 0) { Vz = Vz & (Vz - 1); ++z; } return z; } ++layer; } else { carrybitW = 0; for (int i = 0; i <= WordNum - 1; ++i) { tmpa = W1[0][i]; U = tmpa & Y[A[layer] - 1][i]; W2[0][i] = (tmpa + U + carrybitW) | (tmpa - U); //detect overflow if (U > MaxNUM64 - tmpa) carrybitW = 1; else carrybitW = 0; } tmpa = W1[0][WordNum]; U = tmpa & Y[A[layer] - 1][WordNum]; W2[0][WordNum] = ((tmpa + U + carrybitW) % modLastWord) | (tmpa - U); //detect overflow if (U > MaxNUM64 - tmpa) carrybitW = 1; else carrybitW = 0; //Main calculations uint64_t Up = 0, Upp = 0, Wp = 0, Wpp = 0, Ut = 0, Wt = 0, Vt = 0, carrybitWp = 0, carrybitWpp = 0; int tmpv = ceil(log2(WordSize)) - 1; for (int k = 1; k <= Blen; ++k) { carrybitWp = 0, carrybitWpp = 0; bool f = false; for (int i = 0; i <= WordNum - 1; ++i) { tmpa = W1[k][i]; Up = tmpa & Y[A[layer] - 1][i]; Wp = (tmpa + Up + carrybitWp) | (tmpa - Up); if (Up > MaxNUM64 - tmpa) carrybitWp = 1; else carrybitWp = 0; tmpa = W2[k - 1][i]; Upp = tmpa & Y[B[k] - 1][i]; Wpp = (tmpa + Upp + carrybitWpp) | (tmpa - Upp); if (Upp > MaxNUM64 - tmpa) carrybitWpp = 1; else carrybitWpp = 0; Ut = Wp | Wpp; Wt = Wp ^ Wpp; Vt = Wt; for (int ipp = 0; ipp <= tmpv; ++ipp) { Vt = Vt ^ (Vt << (1 << ipp)); } if (f == true) Vt = ~Vt; if ((Vt & 0X8000000000000000) > 0) f = true; else f = false; W2[k][i] = ~(Wt & Vt) & Ut; } tmpa = W1[k][WordNum]; Up = tmpa & Y[A[layer] - 1][WordNum]; Wp = ((tmpa + Up + carrybitWp) % modLastWord) | (tmpa - Up); if (Up > MaxNUM64 - tmpa) carrybitWp = 1; else carrybitWp = 0; tmpa = W2[k - 1][WordNum]; Upp = tmpa & Y[B[k] - 1][WordNum]; Wpp = ((tmpa + Upp + carrybitWpp) % modLastWord) | (tmpa - Upp); if (Upp > MaxNUM64 - tmpa) carrybitWpp = 1; else carrybitWpp = 0; Ut = Wp | Wpp; Wt = Wp ^ Wpp; Vt = Wt; for (int ipp = 0; ipp <= tmpv; ++ipp) { Vt = Vt ^ (Vt << (1 << ipp)); } if (f == true) Vt = ~Vt; if ((Vt & 0X8000000000000000) > 0) f = true; else f = false; W2[k][WordNum] = ~(Wt & Vt) & Ut; } if (layer == Alen) { int z = 0; uint64_t Vz = 0; for (int i = 0; i <= WordNum - 1; ++i) { Vz = ~W2[Blen][i]; while (Vz != 0) { Vz = Vz & (Vz - 1); ++z; } } Vz = (~W2[Blen][WordNum]) % modLastWord; while (Vz != 0) { Vz = Vz & (Vz - 1); ++z; } return z; } ++layer; } } }